
/*** Trends - Empirical Application

	---- MAIN REGRESSIONS ----

This code estimates the effect of the compulsory school reform on IGEs for Table 2. It also creates Figures A.2 and A.6. The following steps are implemented:

1. First generation effects.
1a. Education: Regression coefficient, Correlation, Rank slope (Table 2, col 1)
1b. Income: Regression coefficient (IGE), Correlation, Rank slope (Table 2, col 2)
2. Second generation effects.
2a. Draw figure comparing intergenerational coefficient "old" vs. "young" fathers (Figure A.2)
2b. Education: Regression coefficient, Correlation, Rank slope (Table 2, col 3)
2c. Income: Regression coefficient (IGE), Correlation, Rank slope (Table 2, col 4)
3. Trends stacked around first reform year ("event study" graphs) (Figure A.6)

***************************************************************************************/

* STATA settings:
clear
capture log close

** 1. First (reform) generation effects: 

* Loop over outcome measure 

forval outcome = 1/2 {

	if `outcome' == 1 {
		use ${path2}Dat/main_file.dta, replace
		global outcome "edu"
		global y_var "educyrs"
		global x_var "f_educyrs70"
	}

	if `outcome' == 2 {

		use ${path2}Dat/main_file.dta, replace
		global outcome "inc"
		global y_var "l_lifeinc3035"
		global x_var "f_l_lifeinc5359"
	}

	* log file 
	capture log close
	log using ${path2}Logs/${ver}/school_reform_est_2step_gen1_${outcome}, text replace
	
	* choose sample (see dataset.do)
	keep if sample_2 == 1    
	global sampl=2

	* additional sample restriction for income
	if `outcome' == 2 {
		keep if ${x_var}>log(10000) & ${y_var}>log(10000)
		drop if 1970-f_byear>55		
	}

	* Generate standardized variable by cohorts
	gen ${x_var}_std=.
	by f_byear , sort: egen xmean=mean(${x_var}) 
	by f_byear , sort: egen xsd=sd(${x_var})	
	replace ${x_var}_std = (${x_var}-xmean)/xsd
	drop xmean xsd

	gen ${y_var}_std=.
	by foddar , sort: egen ymean=mean(${y_var}) 
	by foddar , sort: egen ysd=sd(${y_var})  
	replace ${y_var}_std = (${y_var}-ymean)/ysd
	drop ymean ysd 

	* Generate ranks by cohorts
	by f_byear , sort: egen i=rank(${x_var}) 
	by f_byear , sort: egen n=count(${x_var}) 
	gen ${x_var}_rank = (i-0.5)/n
	cap drop i n
	su ${x_var}_rank 

	by foddar , sort: egen i=rank(${y_var})
	by foddar , sort: egen n=count(${y_var})
	gen ${y_var}_rank = (i-0.5)/n
	cap drop i n
	su ${y_var}_rank 	

	* Further sample selection (keep only reform cohorts with non-missing key variables)
	  	  
	keep if foddar>=1943 & foddar<=1955
	keep if experiment!=. & ${y_var}!=. & foddar!=.
	  
	keep if urval==1 		// We drop siblings etc

   * Generate interactions
   gen exp_${x_var} = experiment*${x_var}
	
	* POOLED OLS PROCEDURE
	
	* Estimate
	set matsize 11000
	di "   "
	di "1st GENERATION,"
	di "Sample " ${sampl}
	di "Outcome: ${outcome} " 
	di "Pooled OLS"
	
	** POOLED
	
 	* Baseline
	areg ${y_var} i.foddar#c.${x_var} i.kommun60_#c.${x_var} ${x_var} experiment c.experiment#c.${x_var} i.foddar, absorb(kommun60_) vce(cluster kommun60_)
	est store gen1_smp${sampl}_${outcome}_pooled1	
		
 	* Baseline, standardized
	areg ${y_var}_std i.foddar#c.${x_var}_std i.kommun60_#c.${x_var}_std ${x_var}_std experiment c.experiment#c.${x_var}_std i.foddar , absorb(kommun60_) vce(cluster kommun60_)
	est store gen1_smp${sampl}_${outcome}_pooled1corr
	
 	* Baseline, ranks
	areg ${y_var}_rank i.foddar#c.${x_var}_rank i.kommun60_#c.${x_var}_rank ${x_var}_rank experiment c.experiment#c.${x_var}_rank i.foddar, absorb(kommun60_) vce(cluster kommun60_)
	est store gen1_smp${sampl}_${outcome}_pooled1rank

	* Export table
	cap esttab 	gen1_smp${sampl}_${outcome}_pooled1 ///
		gen1_smp${sampl}_${outcome}_pooled1corr ///
		gen1_smp${sampl}_${outcome}_pooled1rank ///			
		using "${path2}/Logs/${ver}/reg_1stgen_${outcome}_pooled_${date_string}.csv" , ///
		se csv replace nogaps page  star(* 0.10 ** 0.05 *** 0.01) keep(${x_var} c.experiment#c.${x_var} experiment ${x_var}_std c.experiment#c.${x_var}_std ${x_var}_rank c.experiment#c.${x_var}_rank 
}


** 2. Second (reform) generation effects: 

* Choose outcome measure

forval outcome = 1/2 {

if `outcome' == 1 {

	use ${path2}Dat/main_file.dta, replace
	global outcome "edu"

	* log file 
	capture log close
	log using "${path2}Logs/${ver}/school_reform_est_2step_gen2_${outcome}", text replace

	global y_var "educyrs"
	global x_var "f_educyrs90"
}

if `outcome' == 2 {

	use ${path2}Dat/main_file.dta, replace
	global outcome "inc"

	* log file 
	capture log close
	log using "${path2}Logs/${ver}/school_reform_est_2step_gen2_${outcome}", text replace

	global y_var "l_lifeinc3035"
	global x_var "f_l_lifeinc3545"
}

	* choose sample (see dataset.do)
	keep if sample_2== 1
	global sampl=2

	* additional sample restriction for income

	if `outcome' == 2 {
		keep if ${x_var}>log(10000) & ${y_var}>log(10000)
	}
		
	* Generate standardized variables by cohorts
	gen ${x_var}_std=.
	by f_byear , sort: egen xmean=mean(${x_var}) 
	by f_byear , sort: egen xsd=sd(${x_var})  	
	replace ${x_var}_std = (${x_var}-xmean)/xsd
	drop xmean xsd
	
	gen ${y_var}_std=.
	by foddar , sort: egen ymean=mean(${y_var})  
	by foddar , sort: egen ysd=sd(${y_var})   	
	replace ${y_var}_std = (${y_var}-ymean)/ysd
	drop ymean ysd 

	* Generate ranks by cohorts
	by f_byear , sort: egen i=rank(${x_var})  
	by f_byear , sort: egen n=count(${x_var})  
	gen ${x_var}_rank = (i-0.5)/n
	cap drop i n
	su ${x_var}_rank 

	by foddar , sort: egen i=rank(${y_var})  
	by foddar , sort: egen n=count(${y_var})  
	gen ${y_var}_rank = (i-0.5)/n
	cap drop i n
	su ${y_var}_rank 	

	* Draw figure comparing intergen. educ. coeff. "old" vs. "young" fathers (Figure A.2)
	
	if `outcome' == 1 {
		
	su f_ageatbirth , d
	scalar medianage=r(p50)
	gen youngfather=f_ageatbirth<medianage
	gen oldfather=f_ageatbirth>medianage & f_ageatbirth<.
	
	reg  ${y_var} i.foddar i.foddar#c.${x_var} if foddar>=1966 & foddar<=1972
	reg  ${y_var} i.foddar i.foddar#c.${x_var} if foddar>=1966 & foddar<=1972 & youngfather==1 
	reg  ${y_var} i.foddar i.foddar#c.${x_var} if foddar>=1966 & foddar<=1972 & oldfather==1 

	* Graph
	gen g_year=.
	gen g_coef=.
	gen g_coefyoung=.
	gen g_coefold=.
	gen g_lbyoung=.
 	gen g_lbold=.
	gen g_ubyoung=.
	gen g_ubold=.
	forval i=1960/1972 {
		replace g_year=`i' in `i'
		reg  ${y_var} ${x_var} if foddar==`i' 
		replace g_coef=_b[${x_var}] in `i'
		reg  ${y_var} ${x_var} if foddar==`i' & youngfather==1 
		replace g_coefyoung=_b[${x_var}] in `i'
		replace g_lbyoung=_b[${x_var}] - _se[${x_var}]*invttail(e(df_r),0.5*(1-c(level)/100)) in `i'
		replace g_ubyoung=_b[${x_var}] + _se[${x_var}]*invttail(e(df_r),0.5*(1-c(level)/100)) in `i'
		reg  ${y_var} ${x_var} if foddar==`i' & oldfather==1 
		replace g_coefold=_b[${x_var}] in `i'	
		replace g_lbold=_b[${x_var}] - _se[${x_var}]*invttail(e(df_r),0.5*(1-c(level)/100)) in `i'
		replace g_ubold=_b[${x_var}] + _se[${x_var}]*invttail(e(df_r),0.5*(1-c(level)/100)) in `i'		
	}
	twoway ///
		(connected g_coefyoung g_year ,  color(black) lpattern(dash) msymbol(R) msize(small)) ///
		(rcap g_ubyoung g_lbyoung g_year , color(gs10) lwidth(medthin)) /// 
		(connected g_coefold g_year ,  color(black) msymbol(S) msize(small)) ///
		(rcap g_ubold g_lbold g_year , color(gs10) lwidth(medthin)) /// 
		, xlabel(1960(2)1972) xmtick(1960(1)1972) graphregion(color(white)) legend(order(1 3) lab(1 "Young fathers") lab(3 "Old fathers"))  ///
		xtitle("cohort") 
	graph export "${path2}/Logs/${ver}/figure_old_vs_young_${sampl}_${outcome}.eps" , replace 
	graph save "${path2}/Logs/${ver}/figure_old_vs_young_${sampl}_${outcome}.gph" , replace 
	cap drop g_*
	} 
	
	
	* Further sample selection (cohorts with representatively sampled reform parents, non-missing key variables)
	keep if ${y_var}!=. & ${x_var}!=. & f_experiment!=. 
	keep if foddar>=1966 & foddar<=1972 	
	keep if f_byear>=1943 & f_byear<=1955	
	keep if f_urval == 1 | m_urval ==1		

	* Generate interactions
	gen exp_${x_var} = f_experiment*${x_var}

	* POOLED OLS PROCEDURE

	* Estimate
	set matsize 11000
	di "   "
	di "2nd GENERATION,"
	di "Sample " ${sampl}
	di "Outcome: ${outcome} " 
	di "Pooled OLS"

	** POOLED
	
	* Baseline
	areg ${y_var} ${x_var} f_experiment c.f_experiment#c.${x_var} i.f_byear i.f_kommun60_#c.${x_var} i.f_byear#c.${x_var} , absorb(f_kommun60_) vce(cluster f_kommun60_)	
	est store gen2_smp${sampl}_${outcome}_pooled1  
	
 	* Baseline, standardized
	areg ${y_var}_std ${x_var}_std f_experiment c.f_experiment#c.${x_var}_std i.f_byear i.f_kommun60_#c.${x_var}_std i.f_byear#c.${x_var}_std , absorb(f_kommun60_) vce(cluster f_kommun60_)	
	est store gen2_smp${sampl}_${outcome}_pooled1corr

 	* Baseline, ranks
	areg ${y_var}_rank ${x_var}_rank f_experiment c.f_experiment#c.${x_var}_rank i.f_byear i.f_kommun60_#c.${x_var}_rank i.f_byear#c.${x_var}_rank , absorb(f_kommun60_) vce(cluster f_kommun60_)	
	est store gen2_smp${sampl}_${outcome}_pooled1rank
	
	* Export table
	cap esttab 	gen2_smp${sampl}_${outcome}_pooled1 ///
	 	gen2_smp${sampl}_${outcome}_pooled1corr ///
		gen2_smp${sampl}_${outcome}_pooled1rank ///		
	using "${path2}/Logs/${ver}/reg_2ndgen_smp${sampl}_${outcome}_pooled.csv" , ///
	se csv replace nogaps page  star(* 0.10 ** 0.05 *** 0.01) keep(${x_var} f_experiment c.f_experiment#c.${x_var} ${x_var}_std c.f_experiment#c.${x_var}_std ${x_var}_rank c.f_experiment#c.${x_var}_rank 
}


** 3. Trends stacked around first reform year (Figure A.6)
	
	use ${path2}Dat/main_file.dta, replace // reloading data because we need pre-1943 and/or post-1955 data
	
	* log file 
	capture log close
	log using "${path2}Logs/${ver}/school_reform_est_around_tzero", text replace

	* prepare data
	keep if foddar>1939 & foddar<1960
	keep if sample_2 == 1    
	global sampl=2
	keep if urval==1
	keep if f_ageatbirth<33

	gen low_educ = (educyrs<9)
	gen avg_educ = educyrs

	* identify first reform cohort for each municipality
	gen reformkidinyear=foddar if experiment==1
	bysort kommun60_: egen kommun_reform=min(reformkidinyear)
	drop reformkidinyear

	*  compute
	gen norm_year = foddar-firstcohort60
	
	local time "-3(1)3"
	
	gen g_norm_year=.
	forval i=1(1)7 {
	replace g_norm_year=`i'-4 in `i'
	}
	
	foreach prefix in low avg variance igcoeff {
	cap drop `prefix'_educ_early_sample
	cap drop `prefix'_educ_late_sample
	cap drop `prefix'_educ_mid_sample
	cap drop `prefix'_educ_full_sample
	gen `prefix'_educ_early_sample=.
	label var `prefix'_educ_early_sample "Cohorts 1943-1948"
	gen `prefix'_educ_late_sample=.
	label var `prefix'_educ_late_sample "Cohorts 1950-1957"
	gen `prefix'_educ_mid_sample=.
	label var `prefix'_educ_mid_sample "Cohorts 1946-1952"
	gen `prefix'_educ_full_sample=.
	label var `prefix'_educ_full_sample "Full sample (cohorts 1943-1957)"
	gen `prefix'_educ_full_sample_se=.
	label var `prefix'_educ_full_sample "Full sample (cohorts 1943-1957) s.e."
	gen `prefix'_educ_full_sample_lb=.
	label var `prefix'_educ_full_sample "Full sample (cohorts 1943-1957) l.b."
	gen `prefix'_educ_full_sample_ub=.
	label var `prefix'_educ_full_sample "Full sample (cohorts 1943-1957) u.b."
	}
	
	forval i=`time' {
	foreach prefix in low avg  {
	sum `prefix'_educ if norm_year==`i'  & foddar>1942 & foddar<1949 
	replace `prefix'_educ_early_sample=r(mean) if g_norm_year==`i'
	sum `prefix'_educ if norm_year==`i'  & foddar>1945 & foddar<1953 
	replace `prefix'_educ_mid_sample=r(mean) if g_norm_year==`i'
	sum `prefix'_educ if norm_year==`i'  & foddar>1949 & foddar<1958 
	replace `prefix'_educ_late_sample=r(mean) if g_norm_year==`i'
	sum `prefix'_educ if norm_year==`i'  & foddar>1942 & foddar<1960  & kommun_reform>1945   
	replace `prefix'_educ_full_sample=r(mean) if g_norm_year==`i'
	replace `prefix'_educ_full_sample_se=sqrt(1/r(N))*r(sd) if g_norm_year==`i'
	replace `prefix'_educ_full_sample_lb=`prefix'_educ_full_sample - `prefix'_educ_full_sample_se*1.96 if g_norm_year==`i'
	replace `prefix'_educ_full_sample_ub=`prefix'_educ_full_sample + `prefix'_educ_full_sample_se*1.96 if g_norm_year==`i'
	}

	sum avg_educ if norm_year==`i'  & foddar>1942 & foddar<1949 
	replace variance_educ_early_sample =r(sd)^2 if g_norm_year==`i'
	sum avg_educ if norm_year==`i'  & foddar>1945 & foddar<1953 
	replace variance_educ_mid_sample=r(sd)^2 if g_norm_year==`i'
	sum avg_educ if norm_year==`i'  & foddar>1949 & foddar<1958 
	replace variance_educ_late_sample=r(sd)^2 if g_norm_year==`i'
	sum avg_educ if norm_year==`i'  & foddar>1942 & foddar<1960  & kommun_reform>1945   
	gen sqdev_avg_educ = (avg_educ-r(mean))^2 if norm_year==`i' & foddar>1942 & foddar<1960  & kommun_reform>1945  
	mean sqdev_avg_educ if norm_year==`i'  & foddar>1942 & foddar<1960  & kommun_reform>1945   
	replace variance_educ_full_sample=_b[sqdev_avg_educ] if g_norm_year==`i'
	replace variance_educ_full_sample_se=_se[sqdev_avg_educ] if g_norm_year==`i'   
	replace variance_educ_full_sample_lb=variance_educ_full_sample - variance_educ_full_sample_se*1.96 if g_norm_year==`i'
	replace variance_educ_full_sample_ub=variance_educ_full_sample + variance_educ_full_sample_se*1.96 if g_norm_year==`i'
	drop sqdev_avg_educ

	reg educyrs f_educyrs70 if norm_year==`i'  & foddar>1942 & foddar<1949 
	replace igcoeff_educ_early_sample =_b[f_educyrs70] if g_norm_year==`i'
	reg educyrs f_educyrs70  if norm_year==`i'  & foddar>1945 & foddar<1953 
	replace igcoeff_educ_mid_sample=_b[f_educyrs70] if g_norm_year==`i'
	reg educyrs f_educyrs70 if norm_year==`i'  & foddar>1949 & foddar<1958 
	replace igcoeff_educ_late_sample=_b[f_educyrs70] if g_norm_year==`i'
	reg educyrs f_educyrs70  if norm_year==`i'  & foddar>1942 & foddar<1960  & kommun_reform>1945    
	replace igcoeff_educ_full_sample=_b[f_educyrs70] if g_norm_year==`i'
	replace igcoeff_educ_full_sample_lb=_b[f_educyrs70] - _se[f_educyrs70]*1.96 if g_norm_year==`i'
	replace igcoeff_educ_full_sample_ub=_b[f_educyrs70] + _se[f_educyrs70]*1.96 if g_norm_year==`i'
	
	}

	sort g_norm_year

	* Share with less than 9 yrs of schooling
	
	local time="-3(1)3"
	twoway /// 
		(connected low_educ_full_sample g_norm_year , color(black) msymbol(T) msize(small)) /// 
		(rcap low_educ_full_sample_ub low_educ_full_sample_lb g_norm_year, color(gs10) lwidth(medthin) legend(off)) ///
		, xlabel(`time') scheme(s2mono) graphregio(color(white)) xtitle("Years after reform") ytitle("") title("Share with less than 9 years schooling") name(low, replace)
		qui graph export ${path2}Logs/${ver}/share_min_educ_around_tzero.eps , replace
	
	* Average education
	
	twoway /// 	
		(connected avg_educ_full_sample g_norm_year, color(black) msymbol(T) msize(small))   /// 
		(rcap avg_educ_full_sample_ub avg_educ_full_sample_lb g_norm_year, color(gs10) lwidth(medthin) legend(off)) ///
		, xlabel(`time') scheme(s2mono) graphregio(color(white)) xtitle("Years after reform") ytitle("") title("Average years of schooling") name(avg, replace)
		qui graph export ${path2}Logs/${ver}/avg_educ_around_tzero.eps , replace
	
	* Variance education
	
	twoway /// 
		(connected variance_educ_full_sample g_norm_year, color(black) msymbol(T) msize(small))   /// 
		(rcap variance_educ_full_sample_ub variance_educ_full_sample_lb g_norm_year, color(gs10) lwidth(medthin) legend(off)) ///
		, xlabel(`time') scheme(s2mono) graphregio(color(white)) xtitle("Years after reform") ytitle("") title("Variance of years of schooling") name(variance, replace)
		qui graph export ${path2}Logs/${ver}/variance_educ_around_tzero.eps , replace
	
	* IG coefficient
	
	twoway /// 
		(connected igcoeff_educ_full_sample g_norm_year, color(black) msymbol(T) msize(small))   /// 
		(rcap igcoeff_educ_full_sample_ub igcoeff_educ_full_sample_lb g_norm_year, color(gs10) lwidth(medthin) legend(off)) ///
		, xlabel(`time') scheme(s2mono) graphregio(color(white)) xtitle("Years after reform") ytitle("") title("Intergenerational schooling coefficient") name(igcoeff, replace)
		qui graph export ${path2}Logs/${ver}/igcoeff_educ_around_tzero.eps , replace

cap log close
